package com.xxxx.spark.job

import org.apache.spark.{SparkConf, SparkContext}

object PVUV {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local").setAppName("uv")
    val sc = new SparkContext(conf)
    val file = sc.textFile("data/pvuvdata")
    //146.1.30.98	河南	2017-10-10	1512012307078	5263761960810313758	www.jd.com	Regist
    //pv
    file.map(line => {
      (line.split("\t")(5),1)
    })
      .countByKey()
      .foreach(println)

    file.map(line => {
      val strings = line.split("\t")
      (strings(5),strings(0))
    })
      .distinct()
      .countByKey()
      .foreach(println)
  }

}
